/*==============================================================================
This do file brings in US state level data on

I. 		Age structure
II. 	Educational attainment
III. 	Household size, household ownership
IV. 	Migration
		
Source of data used in this dofile: 
US Census 1980
US Census 1990
US Census 2000
ACS 2000-2011 

Notes: 
1) In the ACS, migration is asked over the previous year. In the Census, 
	the question pertains to migration over the last 5 years.
2) In the 1980 Census, migration is a 50% sample. 

==============================================================================*/

clear
set more off 

cd "/scratch/public/hysteresis_files/IPUMS USA Data"

! uncompress usa_00075.dat.Z

do usa_00075.do

* Create an identifier for whether the data come from the Census or ACS. 
gen census	= (migplac1==.)
gen acs		= (migplac1!=.)

* ==============================================================================
* I. Age structure
* ==============================================================================

forvalues bottom = 0(5)69 {
	local top = `bottom'+4
	quietly gen ts_age_`bottom'_`top' = 1 if age>=`bottom'  & age<=`top' 
}
quietly gen ts_age_70_plus=1 if age>=70 

* ==============================================================================
* II. Educational attainment
* ==============================================================================

gen ts_edatt_less_than_primary = 1 if educd<=22 & age>=15 & gradeatt==0 // less than a sixth grade education 
gen ts_edatt_primary = 1 if educd>= 23 & educd<=50 & age>=15 & gradeatt==0 //less than a high school diploma or equivalent but more than primary
gen ts_edatt_secondary = 1 if educd>= 60 & educd<=90 & age>=15 & gradeatt==0 //high school diploma or equivalent, and some college but no degree
gen ts_edatt_university =1 if educd>= 100 & educd<=116 & age>=15 & gradeatt==0 //four years of college or better

egen ts_edatt_total = rsum(ts_edatt*) 

*Years of school
forval n = 0/22 {
	gen ts_edatt_yrs_`n' = . 
}

replace ts_edatt_yrs_0 = 1 if educd>=0 & educd<=2 & age>=15 & gradeatt==0
replace ts_edatt_yrs_0 = 1 if educd==11 & age>=15 & gradeatt==0 
replace ts_edatt_yrs_1 = 1 if educd==12 & age>=15 & gradeatt==0
replace ts_edatt_yrs_2 = 1 if (educd==10|educd==14) & age>=15 & gradeatt==0
replace ts_edatt_yrs_3 = 1 if educd==15 & age>=15 & gradeatt==0
replace ts_edatt_yrs_4 = 1 if (educd==13|educd==16) & age>=15 & gradeatt==0
replace ts_edatt_yrs_5 = 1 if educd==17 & age>=15 & gradeatt==0
replace ts_edatt_yrs_6 = 1 if educd==22 & age>=15 & gradeatt==0
replace ts_edatt_yrs_7 = 1 if (educd==21|educd==23) & age>=15 & gradeatt==0
replace ts_edatt_yrs_8 = 1 if (educd==20|educd==25) & age>=15 & gradeatt==0
replace ts_edatt_yrs_9 = 1 if (educd==24|educd==26) & age>=15 & gradeatt==0
replace ts_edatt_yrs_10 = 1 if educd==30 & age>=15 & gradeatt==0
replace ts_edatt_yrs_11 = 1 if educd==40 & age>=15 & gradeatt==0
replace ts_edatt_yrs_12 = 1 if educd==50 & age>=15 & gradeatt==0
replace ts_edatt_yrs_13 = 1 if (educd>=60 & educd<=64) & age>=15 & gradeatt==0
replace ts_edatt_yrs_14 = 1 if (educd>=65 & educd<=71) & age>=15 & gradeatt==0
replace ts_edatt_yrs_15 = 1 if (educd>=80&educd<=83) & age>=15 & gradeatt==0
replace ts_edatt_yrs_16 = 1 if educd==90 & age>=15 & gradeatt==0
replace ts_edatt_yrs_17 = 1 if (educd>=100 & educd<=101) & age>=15 & gradeatt==0 
replace ts_edatt_yrs_18 = 1 if educd==110 & age>=15 & gradeatt==0
replace ts_edatt_yrs_19 = 1 if (educd==111|educd==114|educd==115) & age>=15 & gradeatt==0
replace ts_edatt_yrs_20 = 1 if educd==112 & age>=15 & gradeatt==0
replace ts_edatt_yrs_21 = 1 if educd==113 & age>=15 & gradeatt==0
replace ts_edatt_yrs_22 = 1 if educd==116 & age>=15 & gradeatt==0


* ==============================================================================
* III. 	Household size, household ownership
* ==============================================================================

bys serial: gen n=_n 
gen hh_size = numprec if gq ==1 & n==1 //one observation per household

gen ts_home_own = 1 if ownershp ==1 & gq ==1 & n==1
gen ts_home_no_own = 1 if ownershp ==2 & gq ==1 & n==1

* ==============================================================================
* Create regions
* ==============================================================================

gen region = .
replace region=	840004 if statefip == 1
replace region=	840009 if statefip == 2
replace region=	840008 if statefip == 4
replace region=	840005 if statefip == 5
replace region=	840010 if statefip == 6
replace region=	840008 if statefip == 8
replace region=	840001 if statefip == 9
replace region=	840003 if statefip == 10
replace region=	840003 if statefip == 11
replace region=	840003 if statefip == 12
replace region=	840003 if statefip == 13
replace region=	840003 if statefip == 14
replace region=	840009 if statefip == 15
replace region=	840008 if statefip == 16
replace region=	840006 if statefip == 17
replace region=	840006 if statefip == 18
replace region=	840007 if statefip == 19
replace region=	840007 if statefip == 20
replace region=	840004 if statefip == 21
replace region=	840005 if statefip == 22
replace region=	840001 if statefip == 23
replace region=	840003 if statefip == 24
replace region=	840001 if statefip == 25
replace region=	840006 if statefip == 26
replace region=	840007 if statefip == 27
replace region=	840004 if statefip == 28
replace region=	840007 if statefip == 29
replace region=	840008 if statefip == 30
replace region=	840007 if statefip == 31
replace region=	840008 if statefip == 32
replace region=	840001 if statefip == 33
replace region=	840002 if statefip == 34
replace region=	840008 if statefip == 35
replace region=	840002 if statefip == 36
replace region=	840003 if statefip == 37
replace region=	840007 if statefip == 38
replace region=	840006 if statefip == 39
replace region=	840005 if statefip == 40
replace region=	840009 if statefip == 41
replace region=	840002 if statefip == 42
replace region=	840001 if statefip == 44
replace region=	840003 if statefip == 45
replace region=	840007 if statefip == 46
replace region=	840004 if statefip == 47
replace region=	840005 if statefip == 48
replace region=	840008 if statefip == 49
replace region=	840001 if statefip == 50
replace region=	840003 if statefip == 51
replace region=	840009 if statefip == 53
replace region=	840003 if statefip == 54
replace region=	840006 if statefip == 55
replace region=	840008 if statefip == 56

*Migration 

gen migplac_wvs_region =.

foreach var of varlist migplac1 migplac5  {
	
	replace migplac_wvs_region=	840004 if `var' == 1
	replace migplac_wvs_region=	840009 if `var' == 2
	replace migplac_wvs_region=	840008 if `var' == 4
	replace migplac_wvs_region=	840005 if `var' == 5
	replace migplac_wvs_region=	840010 if `var' == 6
	replace migplac_wvs_region=	840008 if `var' == 8
	replace migplac_wvs_region=	840001 if `var' == 9
	replace migplac_wvs_region=	840003 if `var' == 10
	replace migplac_wvs_region=	840003 if `var' == 11
	replace migplac_wvs_region=	840003 if `var' == 12
	replace migplac_wvs_region=	840003 if `var' == 13
	replace migplac_wvs_region=	840003 if `var' == 14
	replace migplac_wvs_region=	840009 if `var' == 15
	replace migplac_wvs_region=	840008 if `var' == 16
	replace migplac_wvs_region=	840006 if `var' == 17
	replace migplac_wvs_region=	840006 if `var' == 18
	replace migplac_wvs_region=	840007 if `var' == 19
	replace migplac_wvs_region=	840007 if `var' == 20
	replace migplac_wvs_region=	840004 if `var' == 21
	replace migplac_wvs_region=	840005 if `var' == 22
	replace migplac_wvs_region=	840001 if `var' == 23
	replace migplac_wvs_region=	840003 if `var' == 24
	replace migplac_wvs_region=	840001 if `var' == 25
	replace migplac_wvs_region=	840006 if `var' == 26
	replace migplac_wvs_region=	840007 if `var' == 27
	replace migplac_wvs_region=	840004 if `var' == 28
	replace migplac_wvs_region=	840007 if `var' == 29
	replace migplac_wvs_region=	840008 if `var' == 30
	replace migplac_wvs_region=	840007 if `var' == 31
	replace migplac_wvs_region=	840008 if `var' == 32
	replace migplac_wvs_region=	840001 if `var' == 33
	replace migplac_wvs_region=	840002 if `var' == 34
	replace migplac_wvs_region=	840008 if `var' == 35
	replace migplac_wvs_region=	840002 if `var' == 36
	replace migplac_wvs_region=	840003 if `var' == 37
	replace migplac_wvs_region=	840007 if `var' == 38
	replace migplac_wvs_region=	840006 if `var' == 39
	replace migplac_wvs_region=	840005 if `var' == 40
	replace migplac_wvs_region=	840009 if `var' == 41
	replace migplac_wvs_region=	840002 if `var' == 42
	replace migplac_wvs_region=	840001 if `var' == 44
	replace migplac_wvs_region=	840003 if `var' == 45
	replace migplac_wvs_region=	840007 if `var' == 46
	replace migplac_wvs_region=	840004 if `var' == 47
	replace migplac_wvs_region=	840005 if `var' == 48
	replace migplac_wvs_region=	840008 if `var' == 49
	replace migplac_wvs_region=	840001 if `var' == 50
	replace migplac_wvs_region=	840003 if `var' == 51
	replace migplac_wvs_region=	840009 if `var' == 53
	replace migplac_wvs_region=	840003 if `var' == 54
	replace migplac_wvs_region=	840006 if `var' == 55
	replace migplac_wvs_region=	840008 if `var' == 56

}

gen migration = 1 if statefip!=migplac5  & migplac5>0 & migplac5<=56 & census==1 //1 if individual moved domestically within state within last 5 years
	replace migration = 1 if statefip!=migplac1  & migplac1>0 & migplac1<=56 & acs==1  //1 if individual moved domestically within state within last 1 years

gen migplac = migplac5 if census ==1 
	replace migplac = migplac1 if acs==1

********************************************************************************

save temp_precollapse.dta, replace

* Collapse by state, Census region used in WVS, and year

* separate out each year to speed up collapse 
	
foreach year in 1980 1990 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 {
	
	*age, schooling variables, household variables, and foreign-born population
	
	*US states
	use temp_precollapse.dta if year==`year', clear
	display "`year'"
	collapse (sum) ts* (mean) hh_size [fw=perwt], by(statefip year acs) fast
	rename hh_size ts_hh_size

	sort statefip
	tempfile US_states_temp1
	save `US_states_temp1.dta'

	*Census regions
	use temp_precollapse.dta if year==`year', clear
	collapse (sum) ts* (mean) hh_size [fw=perwt], by(region year acs) fast
	rename hh_size ts_hh_size

	sort region
	tempfile Census_regions_temp1
	save `Census_regions_temp1.dta'

	*Inmigration
	*US States
	use temp_precollapse.dta if year==`year', clear

	keep if (statefip!=migplac5 & migplac5>0 & migplac5<=56 & census==1)|(statefip!=migplac1  & migplac1>0 & migplac1<=56 & acs==1)  //keep domestic  movers only
	collapse (sum) migration [fw=perwt], by(statefip acs year) //collapse by state movers are in now, this in inmigration to state

	rename migration ts_inmigration

	sort statefip
	tempfile US_states_inmigration
	save `US_states_inmigration.dta'

	*Census_regions
	use temp_precollapse.dta if year==`year', clear

	keep if (statefip!=migplac5 & migplac5>0 & migplac5<=56 & census==1)|(statefip!=migplac1  & migplac1>0 & migplac1<=56 & acs==1)
	collapse (sum) migration [fw=perwt], by(statefip region year acs) //collapse by state/WVS region movers are in now, this in inmigration to state
	collapse (sum) migration, by(region year acs)

	rename migration ts_inmigration

	sort region
	tempfile Census_regions_inmigration
	save `Census_regions_inmigration.dta'

	*outmigration
	*US states
	use temp_precollapse.dta if year==`year', clear

	keep if (statefip!=migplac5 & migplac5>0 & migplac5<=56 & census==1)|(statefip!=migplac1  & migplac1>0 & migplac1<=56 & acs==1)
	collapse (sum) migration [fw=perwt], by(migplac year acs) //collapse by state then left from, this is outmigration

	rename migration ts_outmigration
	rename migplac statefip

	sort statefip
	tempfile US_states_outmigration
	save `US_states_outmigration.dta'

	*Census regions
	use temp_precollapse.dta if year==`year', clear

	keep if (statefip!=migplac5 & migplac5>0 & migplac5<=56 & census==1)|(statefip!=migplac1  & migplac1>0 & migplac1<=56 & acs==1)
	collapse (sum) migration [fw=perwt], by(migplac migplac_wvs_region acs year) //collapse by state/WVS region then left from, this is outmigration
	collapse (sum) migration, by(migplac_wvs_region acs year)

	rename migration ts_outmigration
	rename migplac region

	sort region
	tempfile Census_regions_outmigration
	save `Census_regions_outmigration.dta'

	*merge/append above initial conditions
	use `Census_regions_temp1.dta', clear
	merge 1:1 region year acs using `Census_regions_outmigration.dta', gen(_merge_regions_out)
	merge 1:1 region year acs using `Census_regions_inmigration.dta', gen(_merge_regions_in)

	tempfile Census_regions_all
	save `Census_regions_all.dta'

	use `US_states_temp1.dta', clear
	merge 1:1 statefip year acs using `US_states_outmigration.dta', gen(_merge_states_out)
	merge 1:1 statefip year acs using `US_states_inmigration.dta', gen(_merge_states_in)

	append using `Census_regions_all.dta'

	drop _merge*

	gen ts_migration_yrs = 1 if acs==1
		replace ts_migration_yrs = 5 if acs==0
	
	tempfile ts_`year'
	save `ts_`year'.dta'

}	

use `ts_1980.dta', clear
foreach year in 1990 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 {
	append using `ts_`year'.dta'
}

* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* Multiply 1980 migration by 2
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

foreach var of varlist ts_inmigration ts_outmigration {
	replace `var' = `var'*2 if year==1980
}

gen nuts=statefip
	replace nuts= region if statefip==.

save "$dta_files/TS_US_CENS.dta", replace

capture rm temp_precollapse.dta
! compress usa_00070.dat
